/* Initialize STATA */

set more off
clear all

** Set directories and paths
local raw_data_file_path = "...\data\raw_data"
local data_file_path = "...\data"
local data_file_name = "intangibles_compustat" 
local output_file_path = "...\results"
local top_brands_count = 100 // set the counts of top brands to take from each year. 


********************************************************************************************************
/* Parameters */

cd "`data_file_path'"
*************************************************************************************************************

use intangibles_compustat

g brand_frequency = BRAND +   BRANDS  

drop if PERMNO == . 
drop if brand_frequency == .

replace brand_frequency = brand_frequency/totalfilewords

duplicates drop PERMNO fyear, force

distinct PERMNO 

summ brand_frequency, d
count if brand_frequency > 0 & brand_frequency != .
bysort fyear: count if brand_frequency > 0 & brand_frequency != .


*drop companies with brands in their name
replace brand_frequency = 0 if strpos(company_name,"BRANDS")> 0 | strpos(company_name,"BRAND")> 0 | strpos(company_name,"Brand")> 0 | strpos(company_name,"Brands")> 0

/* Compustat */

keep if fyear_end_month == 12

drop cusip
drop ip
ren *, lower
g year = fyear



	* Exclude financial firms and firms whose sic code is missing
	
	destring sic, replace force
	destring sich, replace force

	replace sich = sic if mi(sich)				/* Use historical SIC as default but replace it with the current one if it is missing */
	
	* Fama-French 17 industry classifications

	ffind sich, newvar(ffind17) type(17)
	


* Keep Unique Observation

bys permno year (fyr): keep if _n==_N

drop if missing(frequency) 

summ brand_frequency, d
tab year if brand_frequency > 0
distinct gvkey if brand_frequency >0 
distinct gvkey if brand_frequency >1 

bys year: egen quint_mkvalt = xtile(mkvalt), nq(5) 

bysort year: egen brand_rank = rank(-brand_frequency)   


g top_brand = 0
replace top_brand = 1 if brand_rank <= `top_brands_count' 

summ brand_frequency, d

order permno conm top_brand brand_rank


cd "`data_file_path'"	
	

* for top brands portfolio based on brand capital

g BK_pf = top_brand
	
* Set year and save file

g myear = year + 1 			/* To merge with CRSP */	

keep permno year myear brand_frequency ffind17 sich top_brand BK_pf brand_rank conm file company_name ffi frequency totalfilewords advertisement advertisements advertising brand branded branding brandname brands mkvalt at 
	

sort permno year

cd "`output_file_path'"
save CCM, replace

/* CRSP Monthly file */
cd "`raw_data_file_path'"
use crsp, clear

rename *, lower
keep if shrcd==10 | shrcd==11
keep if exchcd==1 | exchcd==2 | exchcd==3


* Set Date

g t = mofd(date)
format t %tm
order t

replace divamt = 0 if mi(divamt) | divamt < 0
bys permno t: egen aux = sum(divamt)
bys permno t: keep if _n==_N
replace divamt = aux
drop aux

* Turn bid-ask quotes into price

replace prc = abs(prc) 
drop if prc < 1
g ME = prc*shrout

bys permno (t): g ME_L1 = ME[_n-1] if t==t[_n-1]+1


g month = month(date)
g myear = year(date)

/* Merge CRSP with Compustat */
cd "`output_file_path'"

merge m:1 permno myear using CCM
keep if _merge == 3
drop _merge

erase CCM.dta


sort permno t

*****************************
cd "`output_file_path'"
save dataset_regs, replace

foreach vw in 0 1 2 { // 0 is equal weighted, 1 value weighted, 2 capped
cd "`output_file_path'"
use dataset_regs, clear

* Sample period

keep if myear >= 1996 & myear <= 2020

order t myear year month
sort myear month
replace p80 = p80*1000

if `vw' == 2{
	replace ME_L1 = p80 if ME_L1 > p80 & ME_L1 != . 
}

* Construct value-weight return or capped value weighted if it was identified before.
if `vw' != 0 {
g ME_L1xret = ME_L1*ret
g ME_L1_Iret = ME_L1 if ~mi(ret)

collapse (sum) ME_L1xret ME_L1_Iret, by(t BK_pf)

drop if BK_pf == .
g ret = ME_L1xret/ME_L1_Iret*100		/* Percentage term */ 	
}



* Construct value-weight return
if `vw' == 0{

collapse (mean) ret , by(t BK_pf)

drop if BK_pf == .
replace ret = ret*100		/* Percentage term */ 	
}

* Merge Fama-French 3 factor and Carhart Momentum factors

preserve


cd "`raw_data_file_path'"
 import excel "Factors.xlsx", sheet("Factors") firstrow clear


rename *, lower
g year = int(date/100)
g month = mod(date, 100)
g t = ym(year, month)
format t %tm

keep t mktrf smb hml mom rf rmw cma qrf qmkt qme qia qroe qeg aqrmkt aqrsmb aqrhml aqrumd aqrbab aqrqmj


cd "`output_file_path'"

tempfile ff
save `ff'

restore

merge m:1 t using `ff', nogen keep(1 3)
sort BK_pf t


g exret = ret - rf		/* Excess return */


* Reshpae data

keep BK_pf t ret exret 
reshape wide ret exret, i(t) j(BK_pf)
tsset t

merge 1:1 t using `ff', nogen keep(1 3)


if `vw' == 1{
	local weight = "VW"
}
if `vw' == 2{
	local weight = "CVW"
}
if `vw' == 0{
	local weight = "EW"
}




** for top brands using brand capital: one dummey: either top brand or not.

g BMK = exret1 - exret0
g cons = 1
label var cons "$ E[R] - r_f (\%)$"

eststo: newey exret1 mktrf smb hml mom, lag(1)
eststo: newey exret1 mktrf smb hml mom rmw cma, lag(1)
eststo: newey exret1 qmkt qme qia qroe , lag(1)
eststo: newey exret1 qmkt qme qia qroe qeg , lag(1)
eststo: newey exret1 aqrmkt aqrsmb aqrhml aqrumd aqrbab aqrqmj , lag(1)

 
esttab using "top_brands_pf_`weight'.csv", replace ///
		t(2) r2 b(2) star(* 0.10 ** 0.05 *** 0.01) ///
		width(\textwidth) label nomtitle nolines nogap noeqlines type compress
eststo clear

*** save the return file
keep t exret1 
rename exret1 portfolio_ret_`weight', replace

save portfolio_ret_`weight', replace
}
}

